import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
life_expectancy_df = pd.read_csv('Life_Expectancy_Data.csv')
life_expectancy_df
| Year | Status | Life expectancy | Adult Mortality | infant deaths | Alcohol | percentage expenditure | Hepatitis B | Measles | BMI | ... | Polio | Total expenditure | Diphtheria | HIV/AIDS | GDP | Population | thinness 1-19 years | thinness 5-9 years | Income composition of resources | Schooling | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2015 | Developing | 65.0 | 263.0 | 62 | 0.01 | 71.279624 | 65.0 | 1154 | 19.1 | ... | 6.0 | 8.16 | 65.0 | 0.1 | 584.259210 | 33736494.0 | 17.2 | 17.3 | 0.479 | 10.1 |
| 1 | 2014 | Developing | 59.9 | 271.0 | 64 | 0.01 | 73.523582 | 62.0 | 492 | 18.6 | ... | 58.0 | 8.18 | 62.0 | 0.1 | 612.696514 | 327582.0 | 17.5 | 17.5 | 0.476 | 10.0 |
| 2 | 2013 | Developing | 59.9 | 268.0 | 66 | 0.01 | 73.219243 | 64.0 | 430 | 18.1 | ... | 62.0 | 8.13 | 64.0 | 0.1 | 631.744976 | 31731688.0 | 17.7 | 17.7 | 0.470 | 9.9 |
| 3 | 2012 | Developing | 59.5 | 272.0 | 69 | 0.01 | 78.184215 | 67.0 | 2787 | 17.6 | ... | 67.0 | 8.52 | 67.0 | 0.1 | 669.959000 | 3696958.0 | 17.9 | 18.0 | 0.463 | 9.8 |
| 4 | 2011 | Developing | 59.2 | 275.0 | 71 | 0.01 | 7.097109 | 68.0 | 3013 | 17.2 | ... | 68.0 | 7.87 | 68.0 | 0.1 | 63.537231 | 2978599.0 | 18.2 | 18.2 | 0.454 | 9.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2933 | 2004 | Developing | 44.3 | 723.0 | 27 | 4.36 | 0.000000 | 68.0 | 31 | 27.1 | ... | 67.0 | 7.13 | 65.0 | 33.6 | 454.366654 | 12777511.0 | 9.4 | 9.4 | 0.407 | 9.2 |
| 2934 | 2003 | Developing | 44.5 | 715.0 | 26 | 4.06 | 0.000000 | 7.0 | 998 | 26.7 | ... | 7.0 | 6.52 | 68.0 | 36.7 | 453.351155 | 12633897.0 | 9.8 | 9.9 | 0.418 | 9.5 |
| 2935 | 2002 | Developing | 44.8 | 73.0 | 25 | 4.43 | 0.000000 | 73.0 | 304 | 26.3 | ... | 73.0 | 6.53 | 71.0 | 39.8 | 57.348340 | 125525.0 | 1.2 | 1.3 | 0.427 | 10.0 |
| 2936 | 2001 | Developing | 45.3 | 686.0 | 25 | 1.72 | 0.000000 | 76.0 | 529 | 25.9 | ... | 76.0 | 6.16 | 75.0 | 42.1 | 548.587312 | 12366165.0 | 1.6 | 1.7 | 0.427 | 9.8 |
| 2937 | 2000 | Developing | 46.0 | 665.0 | 24 | 1.68 | 0.000000 | 79.0 | 1483 | 25.5 | ... | 78.0 | 7.10 | 78.0 | 43.5 | 547.358879 | 12222251.0 | 11.0 | 11.2 | 0.434 | 9.8 |
2938 rows × 21 columns
sns.heatmap(life_expectancy_df.isnull(), yticklabels = False, cbar = False, cmap="Blues")
<AxesSubplot:>
life_expectancy_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2938 entries, 0 to 2937 Data columns (total 21 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Year 2938 non-null int64 1 Status 2938 non-null object 2 Life expectancy 2928 non-null float64 3 Adult Mortality 2928 non-null float64 4 infant deaths 2938 non-null int64 5 Alcohol 2744 non-null float64 6 percentage expenditure 2938 non-null float64 7 Hepatitis B 2385 non-null float64 8 Measles 2938 non-null int64 9 BMI 2904 non-null float64 10 under-five deaths 2938 non-null int64 11 Polio 2919 non-null float64 12 Total expenditure 2712 non-null float64 13 Diphtheria 2919 non-null float64 14 HIV/AIDS 2938 non-null float64 15 GDP 2490 non-null float64 16 Population 2286 non-null float64 17 thinness 1-19 years 2904 non-null float64 18 thinness 5-9 years 2904 non-null float64 19 Income composition of resources 2771 non-null float64 20 Schooling 2775 non-null float64 dtypes: float64(16), int64(4), object(1) memory usage: 482.1+ KB
life_expectancy_df.describe()
| Year | Life expectancy | Adult Mortality | infant deaths | Alcohol | percentage expenditure | Hepatitis B | Measles | BMI | under-five deaths | Polio | Total expenditure | Diphtheria | HIV/AIDS | GDP | Population | thinness 1-19 years | thinness 5-9 years | Income composition of resources | Schooling | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2938.000000 | 2928.000000 | 2928.000000 | 2938.000000 | 2744.000000 | 2938.000000 | 2385.000000 | 2938.000000 | 2904.000000 | 2938.000000 | 2919.000000 | 2712.00000 | 2919.000000 | 2938.000000 | 2490.000000 | 2.286000e+03 | 2904.000000 | 2904.000000 | 2771.000000 | 2775.000000 |
| mean | 2007.518720 | 69.224932 | 164.796448 | 30.303948 | 4.602861 | 738.251295 | 80.940461 | 2419.592240 | 38.321247 | 42.035739 | 82.550188 | 5.93819 | 82.324084 | 1.742103 | 7483.158469 | 1.275338e+07 | 4.839704 | 4.870317 | 0.627551 | 11.992793 |
| std | 4.613841 | 9.523867 | 124.292079 | 117.926501 | 4.052413 | 1987.914858 | 25.070016 | 11467.272489 | 20.044034 | 160.445548 | 23.428046 | 2.49832 | 23.716912 | 5.077785 | 14270.169342 | 6.101210e+07 | 4.420195 | 4.508882 | 0.210904 | 3.358920 |
| min | 2000.000000 | 36.300000 | 1.000000 | 0.000000 | 0.010000 | 0.000000 | 1.000000 | 0.000000 | 1.000000 | 0.000000 | 3.000000 | 0.37000 | 2.000000 | 0.100000 | 1.681350 | 3.400000e+01 | 0.100000 | 0.100000 | 0.000000 | 0.000000 |
| 25% | 2004.000000 | 63.100000 | 74.000000 | 0.000000 | 0.877500 | 4.685343 | 77.000000 | 0.000000 | 19.300000 | 0.000000 | 78.000000 | 4.26000 | 78.000000 | 0.100000 | 463.935626 | 1.957932e+05 | 1.600000 | 1.500000 | 0.493000 | 10.100000 |
| 50% | 2008.000000 | 72.100000 | 144.000000 | 3.000000 | 3.755000 | 64.912906 | 92.000000 | 17.000000 | 43.500000 | 4.000000 | 93.000000 | 5.75500 | 93.000000 | 0.100000 | 1766.947595 | 1.386542e+06 | 3.300000 | 3.300000 | 0.677000 | 12.300000 |
| 75% | 2012.000000 | 75.700000 | 228.000000 | 22.000000 | 7.702500 | 441.534144 | 97.000000 | 360.250000 | 56.200000 | 28.000000 | 97.000000 | 7.49250 | 97.000000 | 0.800000 | 5910.806335 | 7.420359e+06 | 7.200000 | 7.200000 | 0.779000 | 14.300000 |
| max | 2015.000000 | 89.000000 | 723.000000 | 1800.000000 | 17.870000 | 19479.911610 | 99.000000 | 212183.000000 | 87.300000 | 2500.000000 | 99.000000 | 17.60000 | 99.000000 | 50.600000 | 119172.741800 | 1.293859e+09 | 27.700000 | 28.600000 | 0.948000 | 20.700000 |
# Plot the histogram
life_expectancy_df.hist(bins = 30, figsize = (20, 20), color = 'b');
plt.figure(figsize = (20,20))
sns.pairplot(life_expectancy_df)
<seaborn.axisgrid.PairGrid at 0x2b699880220>
<Figure size 1440x1440 with 0 Axes>
sns.scatterplot(data = life_expectancy_df, x = 'Schooling', y = 'Life expectancy ')
<AxesSubplot:xlabel='Schooling', ylabel='Life expectancy '>
sns.scatterplot(data = life_expectancy_df, x = 'GDP', y = 'Life expectancy ')
<AxesSubplot:xlabel='GDP', ylabel='Life expectancy '>
sns.scatterplot(data = life_expectancy_df, x = 'Income composition of resources', y = 'Life expectancy ')
<AxesSubplot:xlabel='Income composition of resources', ylabel='Life expectancy '>
sns.scatterplot(data = life_expectancy_df, x = 'Population', y = 'Schooling')
<AxesSubplot:xlabel='Population', ylabel='Schooling'>
sns.scatterplot(data = life_expectancy_df, x = ' HIV/AIDS', y = 'Life expectancy ')
<AxesSubplot:xlabel=' HIV/AIDS', ylabel='Life expectancy '>
sns.scatterplot(data = life_expectancy_df, x = 'Income composition of resources', y = 'Life expectancy ')
<AxesSubplot:xlabel='Income composition of resources', ylabel='Life expectancy '>
plt.figure(figsize = (20,20))
corr_matrix = life_expectancy_df.corr()
sns.heatmap(corr_matrix, annot = True)
plt.show()
life_expectancy_df
| Year | Status | Life expectancy | Adult Mortality | infant deaths | Alcohol | percentage expenditure | Hepatitis B | Measles | BMI | ... | Polio | Total expenditure | Diphtheria | HIV/AIDS | GDP | Population | thinness 1-19 years | thinness 5-9 years | Income composition of resources | Schooling | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2015 | Developing | 65.0 | 263.0 | 62 | 0.01 | 71.279624 | 65.0 | 1154 | 19.1 | ... | 6.0 | 8.16 | 65.0 | 0.1 | 584.259210 | 33736494.0 | 17.2 | 17.3 | 0.479 | 10.1 |
| 1 | 2014 | Developing | 59.9 | 271.0 | 64 | 0.01 | 73.523582 | 62.0 | 492 | 18.6 | ... | 58.0 | 8.18 | 62.0 | 0.1 | 612.696514 | 327582.0 | 17.5 | 17.5 | 0.476 | 10.0 |
| 2 | 2013 | Developing | 59.9 | 268.0 | 66 | 0.01 | 73.219243 | 64.0 | 430 | 18.1 | ... | 62.0 | 8.13 | 64.0 | 0.1 | 631.744976 | 31731688.0 | 17.7 | 17.7 | 0.470 | 9.9 |
| 3 | 2012 | Developing | 59.5 | 272.0 | 69 | 0.01 | 78.184215 | 67.0 | 2787 | 17.6 | ... | 67.0 | 8.52 | 67.0 | 0.1 | 669.959000 | 3696958.0 | 17.9 | 18.0 | 0.463 | 9.8 |
| 4 | 2011 | Developing | 59.2 | 275.0 | 71 | 0.01 | 7.097109 | 68.0 | 3013 | 17.2 | ... | 68.0 | 7.87 | 68.0 | 0.1 | 63.537231 | 2978599.0 | 18.2 | 18.2 | 0.454 | 9.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2933 | 2004 | Developing | 44.3 | 723.0 | 27 | 4.36 | 0.000000 | 68.0 | 31 | 27.1 | ... | 67.0 | 7.13 | 65.0 | 33.6 | 454.366654 | 12777511.0 | 9.4 | 9.4 | 0.407 | 9.2 |
| 2934 | 2003 | Developing | 44.5 | 715.0 | 26 | 4.06 | 0.000000 | 7.0 | 998 | 26.7 | ... | 7.0 | 6.52 | 68.0 | 36.7 | 453.351155 | 12633897.0 | 9.8 | 9.9 | 0.418 | 9.5 |
| 2935 | 2002 | Developing | 44.8 | 73.0 | 25 | 4.43 | 0.000000 | 73.0 | 304 | 26.3 | ... | 73.0 | 6.53 | 71.0 | 39.8 | 57.348340 | 125525.0 | 1.2 | 1.3 | 0.427 | 10.0 |
| 2936 | 2001 | Developing | 45.3 | 686.0 | 25 | 1.72 | 0.000000 | 76.0 | 529 | 25.9 | ... | 76.0 | 6.16 | 75.0 | 42.1 | 548.587312 | 12366165.0 | 1.6 | 1.7 | 0.427 | 9.8 |
| 2937 | 2000 | Developing | 46.0 | 665.0 | 24 | 1.68 | 0.000000 | 79.0 | 1483 | 25.5 | ... | 78.0 | 7.10 | 78.0 | 43.5 | 547.358879 | 12222251.0 | 11.0 | 11.2 | 0.434 | 9.8 |
2938 rows × 21 columns
# Checking the unique values in country to consider it as a categorical variable
life_expectancy_df['Status'].nunique()
2
life_expectancy_df = pd.get_dummies(life_expectancy_df, columns = ['Status'])
life_expectancy_df
| Year | Life expectancy | Adult Mortality | infant deaths | Alcohol | percentage expenditure | Hepatitis B | Measles | BMI | under-five deaths | ... | Diphtheria | HIV/AIDS | GDP | Population | thinness 1-19 years | thinness 5-9 years | Income composition of resources | Schooling | Status_Developed | Status_Developing | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2015 | 65.0 | 263.0 | 62 | 0.01 | 71.279624 | 65.0 | 1154 | 19.1 | 83 | ... | 65.0 | 0.1 | 584.259210 | 33736494.0 | 17.2 | 17.3 | 0.479 | 10.1 | 0 | 1 |
| 1 | 2014 | 59.9 | 271.0 | 64 | 0.01 | 73.523582 | 62.0 | 492 | 18.6 | 86 | ... | 62.0 | 0.1 | 612.696514 | 327582.0 | 17.5 | 17.5 | 0.476 | 10.0 | 0 | 1 |
| 2 | 2013 | 59.9 | 268.0 | 66 | 0.01 | 73.219243 | 64.0 | 430 | 18.1 | 89 | ... | 64.0 | 0.1 | 631.744976 | 31731688.0 | 17.7 | 17.7 | 0.470 | 9.9 | 0 | 1 |
| 3 | 2012 | 59.5 | 272.0 | 69 | 0.01 | 78.184215 | 67.0 | 2787 | 17.6 | 93 | ... | 67.0 | 0.1 | 669.959000 | 3696958.0 | 17.9 | 18.0 | 0.463 | 9.8 | 0 | 1 |
| 4 | 2011 | 59.2 | 275.0 | 71 | 0.01 | 7.097109 | 68.0 | 3013 | 17.2 | 97 | ... | 68.0 | 0.1 | 63.537231 | 2978599.0 | 18.2 | 18.2 | 0.454 | 9.5 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2933 | 2004 | 44.3 | 723.0 | 27 | 4.36 | 0.000000 | 68.0 | 31 | 27.1 | 42 | ... | 65.0 | 33.6 | 454.366654 | 12777511.0 | 9.4 | 9.4 | 0.407 | 9.2 | 0 | 1 |
| 2934 | 2003 | 44.5 | 715.0 | 26 | 4.06 | 0.000000 | 7.0 | 998 | 26.7 | 41 | ... | 68.0 | 36.7 | 453.351155 | 12633897.0 | 9.8 | 9.9 | 0.418 | 9.5 | 0 | 1 |
| 2935 | 2002 | 44.8 | 73.0 | 25 | 4.43 | 0.000000 | 73.0 | 304 | 26.3 | 40 | ... | 71.0 | 39.8 | 57.348340 | 125525.0 | 1.2 | 1.3 | 0.427 | 10.0 | 0 | 1 |
| 2936 | 2001 | 45.3 | 686.0 | 25 | 1.72 | 0.000000 | 76.0 | 529 | 25.9 | 39 | ... | 75.0 | 42.1 | 548.587312 | 12366165.0 | 1.6 | 1.7 | 0.427 | 9.8 | 0 | 1 |
| 2937 | 2000 | 46.0 | 665.0 | 24 | 1.68 | 0.000000 | 79.0 | 1483 | 25.5 | 39 | ... | 78.0 | 43.5 | 547.358879 | 12222251.0 | 11.0 | 11.2 | 0.434 | 9.8 | 0 | 1 |
2938 rows × 22 columns
life_expectancy_df.isnull().sum()[np.where(life_expectancy_df.isnull().sum() != 0)[0]]
Life expectancy 10 Adult Mortality 10 Alcohol 194 Hepatitis B 553 BMI 34 Polio 19 Total expenditure 226 Diphtheria 19 GDP 448 Population 652 thinness 1-19 years 34 thinness 5-9 years 34 Income composition of resources 167 Schooling 163 dtype: int64
life_expectancy_df = life_expectancy_df.apply(lambda x: x.fillna(x.mean()),axis=0)
life_expectancy_df.isnull().sum()[np.where(life_expectancy_df.isnull().sum() != 0)[0]]
Series([], dtype: int64)
X = life_expectancy_df.drop(columns = ['Life expectancy '])
y = life_expectancy_df[['Life expectancy ']]
X
| Year | Adult Mortality | infant deaths | Alcohol | percentage expenditure | Hepatitis B | Measles | BMI | under-five deaths | Polio | ... | Diphtheria | HIV/AIDS | GDP | Population | thinness 1-19 years | thinness 5-9 years | Income composition of resources | Schooling | Status_Developed | Status_Developing | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2015 | 263.0 | 62 | 0.01 | 71.279624 | 65.0 | 1154 | 19.1 | 83 | 6.0 | ... | 65.0 | 0.1 | 584.259210 | 33736494.0 | 17.2 | 17.3 | 0.479 | 10.1 | 0 | 1 |
| 1 | 2014 | 271.0 | 64 | 0.01 | 73.523582 | 62.0 | 492 | 18.6 | 86 | 58.0 | ... | 62.0 | 0.1 | 612.696514 | 327582.0 | 17.5 | 17.5 | 0.476 | 10.0 | 0 | 1 |
| 2 | 2013 | 268.0 | 66 | 0.01 | 73.219243 | 64.0 | 430 | 18.1 | 89 | 62.0 | ... | 64.0 | 0.1 | 631.744976 | 31731688.0 | 17.7 | 17.7 | 0.470 | 9.9 | 0 | 1 |
| 3 | 2012 | 272.0 | 69 | 0.01 | 78.184215 | 67.0 | 2787 | 17.6 | 93 | 67.0 | ... | 67.0 | 0.1 | 669.959000 | 3696958.0 | 17.9 | 18.0 | 0.463 | 9.8 | 0 | 1 |
| 4 | 2011 | 275.0 | 71 | 0.01 | 7.097109 | 68.0 | 3013 | 17.2 | 97 | 68.0 | ... | 68.0 | 0.1 | 63.537231 | 2978599.0 | 18.2 | 18.2 | 0.454 | 9.5 | 0 | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2933 | 2004 | 723.0 | 27 | 4.36 | 0.000000 | 68.0 | 31 | 27.1 | 42 | 67.0 | ... | 65.0 | 33.6 | 454.366654 | 12777511.0 | 9.4 | 9.4 | 0.407 | 9.2 | 0 | 1 |
| 2934 | 2003 | 715.0 | 26 | 4.06 | 0.000000 | 7.0 | 998 | 26.7 | 41 | 7.0 | ... | 68.0 | 36.7 | 453.351155 | 12633897.0 | 9.8 | 9.9 | 0.418 | 9.5 | 0 | 1 |
| 2935 | 2002 | 73.0 | 25 | 4.43 | 0.000000 | 73.0 | 304 | 26.3 | 40 | 73.0 | ... | 71.0 | 39.8 | 57.348340 | 125525.0 | 1.2 | 1.3 | 0.427 | 10.0 | 0 | 1 |
| 2936 | 2001 | 686.0 | 25 | 1.72 | 0.000000 | 76.0 | 529 | 25.9 | 39 | 76.0 | ... | 75.0 | 42.1 | 548.587312 | 12366165.0 | 1.6 | 1.7 | 0.427 | 9.8 | 0 | 1 |
| 2937 | 2000 | 665.0 | 24 | 1.68 | 0.000000 | 79.0 | 1483 | 25.5 | 39 | 78.0 | ... | 78.0 | 43.5 | 547.358879 | 12222251.0 | 11.0 | 11.2 | 0.434 | 9.8 | 0 | 1 |
2938 rows × 21 columns
y
| Life expectancy | |
|---|---|
| 0 | 65.0 |
| 1 | 59.9 |
| 2 | 59.9 |
| 3 | 59.5 |
| 4 | 59.2 |
| ... | ... |
| 2933 | 44.3 |
| 2934 | 44.5 |
| 2935 | 44.8 |
| 2936 | 45.3 |
| 2937 | 46.0 |
2938 rows × 1 columns
X.shape
(2938, 21)
y.shape
(2938, 1)
# Convert the data type to float32
X = np.array(X).astype('float32')
y = np.array(y).astype('float32')
# Only take the numerical variables and scale them
X
array([[2.015e+03, 2.630e+02, 6.200e+01, ..., 1.010e+01, 0.000e+00,
1.000e+00],
[2.014e+03, 2.710e+02, 6.400e+01, ..., 1.000e+01, 0.000e+00,
1.000e+00],
[2.013e+03, 2.680e+02, 6.600e+01, ..., 9.900e+00, 0.000e+00,
1.000e+00],
...,
[2.002e+03, 7.300e+01, 2.500e+01, ..., 1.000e+01, 0.000e+00,
1.000e+00],
[2.001e+03, 6.860e+02, 2.500e+01, ..., 9.800e+00, 0.000e+00,
1.000e+00],
[2.000e+03, 6.650e+02, 2.400e+01, ..., 9.800e+00, 0.000e+00,
1.000e+00]], dtype=float32)
# split the data into test and train sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
# Scale the data
from sklearn.preprocessing import StandardScaler
scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)
scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)
# using linear regression model
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, accuracy_score
regresssion_model_sklearn = LinearRegression(fit_intercept = True)
regresssion_model_sklearn.fit(X_train, y_train)
LinearRegression()
regresssion_model_sklearn_accuracy = regresssion_model_sklearn.score(X_test, y_test)
regresssion_model_sklearn_accuracy
0.8340892521270633
print('Linear Model Coefficient (m): ', regresssion_model_sklearn.coef_)
print('Linear Model Coefficient (b): ', regresssion_model_sklearn.intercept_)
Linear Model Coefficient (m): [[-2.61061708e-03 -2.48314038e-01 1.30889893e+00 3.24076451e-02 1.29710045e-02 -2.60179937e-02 -2.49288455e-02 9.39102769e-02 -1.33250380e+00 6.82824329e-02 2.02552099e-02 9.44694877e-02 -2.55512834e-01 5.85425347e-02 -1.13813148e-03 -3.10914405e-02 -1.38065196e-03 1.18575670e-01 2.33237758e-01 2.63141207e-02 -2.63130777e-02]] Linear Model Coefficient (b): [-2.6998622e-08]
# Make prediction
y_predict = regresssion_model_sklearn.predict(X_test)
y_predict
array([[ 5.52710831e-01],
[-2.16832328e+00],
[ 1.67019397e-01],
[-2.60561049e-01],
[ 6.41681075e-01],
[ 9.02905345e-01],
[-1.15051806e+00],
[ 2.54885584e-01],
[-6.32353783e-01],
[-2.10694575e+00],
[ 8.92400503e-01],
[-3.87898594e-01],
[ 4.61621553e-01],
[ 3.12996089e-01],
[-2.75252271e+00],
[ 4.58067030e-01],
[ 3.05019468e-01],
[ 1.12625110e+00],
[ 7.49843001e-01],
[-1.09040451e+00],
[ 6.60644352e-01],
[ 1.71031833e-01],
[-3.84888314e-02],
[-8.21287632e-02],
[ 1.01489449e+00],
[ 3.93126875e-01],
[-1.03998697e+00],
[ 4.70234126e-01],
[ 6.57431364e-01],
[ 2.32481062e-01],
[-1.52942050e+00],
[-5.84325194e-01],
[-1.73759401e+00],
[ 2.12526619e-01],
[-1.20008457e+00],
[-5.68039119e-01],
[-1.86422586e-01],
[-2.44574212e-02],
[-3.50263268e-01],
[ 8.95781815e-02],
[ 4.71137226e-01],
[-9.68977809e-01],
[ 6.29414976e-01],
[ 1.71904892e-01],
[ 1.27129161e+00],
[-1.93901025e-02],
[-5.66162646e-01],
[-1.32934423e-02],
[-1.93406105e+00],
[-2.66107631e+00],
[ 9.77280855e-01],
[-2.06961229e-01],
[ 6.55054390e-01],
[ 5.82997799e-01],
[-1.65501207e-01],
[ 1.43740594e+00],
[-2.34375715e+00],
[-1.17040658e+00],
[-1.31018746e+00],
[ 3.55744928e-01],
[ 6.49780273e-01],
[ 5.06207407e-01],
[-5.09586275e-01],
[ 8.60826612e-01],
[ 5.91302872e-01],
[-4.77294289e-02],
[-2.68146825e+00],
[ 5.24610162e-01],
[ 4.77833807e-01],
[ 6.54073179e-01],
[ 8.04929674e-01],
[-1.73394442e+00],
[ 1.07581782e+00],
[-8.08958292e-01],
[ 2.67772347e-01],
[ 9.14890110e-01],
[-7.71501303e-01],
[ 1.49547547e-01],
[-4.25266894e-03],
[ 2.32207656e-01],
[-9.36321557e-01],
[ 3.47960321e-03],
[-2.11641264e+00],
[ 3.77019972e-01],
[-8.16926777e-01],
[ 1.24418724e+00],
[-1.72595656e+00],
[-6.13079548e-01],
[-5.74904859e-01],
[-5.00015281e-02],
[ 3.92089523e-02],
[ 4.12350595e-01],
[ 9.20182765e-02],
[ 2.11439073e-01],
[ 6.10208094e-01],
[ 3.12711537e-01],
[ 6.84671700e-01],
[ 7.47922838e-01],
[ 2.87155032e-01],
[-1.30575106e-01],
[-1.40194976e+00],
[ 1.07779717e+00],
[-5.79488814e-01],
[ 7.57264793e-01],
[ 5.97016573e-01],
[ 9.57629263e-01],
[-1.38976538e+00],
[-1.06413066e+00],
[ 9.54084814e-01],
[ 1.26774669e-01],
[ 3.49221565e-02],
[ 5.22761166e-01],
[ 1.17534131e-01],
[-1.07028754e-02],
[ 4.15537596e-01],
[-9.63635623e-01],
[-1.51113600e-01],
[-2.50926875e-02],
[ 3.06643546e-01],
[-3.09628509e-02],
[ 1.47919683e-02],
[ 1.13012493e+00],
[ 7.25224614e-01],
[-1.26446426e+00],
[-1.59179842e+00],
[-3.06014329e-01],
[ 1.22365415e+00],
[ 5.78518629e-01],
[-1.00798249e-01],
[ 7.60962963e-01],
[ 2.37028420e-01],
[ 8.12666357e-01],
[-1.79120839e-01],
[ 1.27772665e+00],
[-2.45546317e+00],
[ 6.60988927e-01],
[ 3.86145175e-01],
[-7.20776260e-01],
[ 7.69019365e-01],
[ 6.39683366e-01],
[-9.15374458e-01],
[ 2.61953861e-01],
[ 1.26638865e+00],
[ 4.21032578e-01],
[ 4.30117160e-01],
[ 1.48671806e-01],
[-8.91006649e-01],
[ 4.97806698e-01],
[ 2.05942899e-01],
[-1.02481306e-01],
[ 8.83263052e-02],
[-9.72267807e-01],
[ 7.84177303e-01],
[ 5.24665713e-01],
[-6.11437500e-01],
[-2.89502740e+00],
[ 7.83232391e-01],
[-6.30766630e-01],
[-3.76500463e+00],
[ 1.02772188e+00],
[-1.84252656e+00],
[-1.57603037e+00],
[ 9.14078474e-01],
[-8.02739322e-01],
[ 1.17295370e-01],
[ 1.04476988e+00],
[ 4.05269384e-01],
[-1.22281277e+00],
[ 3.78242970e-01],
[ 2.26409748e-01],
[-9.75948393e-01],
[-1.48905009e-01],
[ 9.07619715e-01],
[ 3.68190795e-01],
[-1.83150880e-02],
[-7.16013968e-01],
[-1.65445998e-01],
[-7.37916946e-01],
[ 9.09516633e-01],
[-8.74774754e-01],
[-1.34824872e+00],
[ 6.81117415e-01],
[ 2.51044720e-01],
[-2.00158864e-01],
[ 7.85416722e-01],
[ 8.66891444e-01],
[-1.40278518e+00],
[ 2.81088799e-01],
[-4.56558675e-01],
[ 7.10236430e-02],
[ 4.75339562e-01],
[-1.16799903e+00],
[ 6.48208380e-01],
[ 4.93626922e-01],
[-8.12274635e-01],
[-2.25451663e-01],
[ 1.14626014e+00],
[ 1.17455757e+00],
[ 5.95113970e-02],
[ 4.15205628e-01],
[ 4.15410906e-01],
[-1.70823634e+00],
[ 4.68985468e-01],
[ 1.54942662e-01],
[ 4.92507726e-01],
[-1.65442741e+00],
[-9.63012278e-01],
[-2.17627907e+00],
[-1.40059757e+00],
[ 7.88518786e-02],
[-2.85863131e-01],
[-1.09748232e+00],
[ 1.89638168e-01],
[ 1.07717133e+00],
[-3.52683330e+00],
[-8.49122465e-01],
[ 1.18430519e+00],
[ 6.69337928e-01],
[ 2.31849290e-02],
[ 7.77192652e-01],
[-1.11681783e+00],
[ 9.24960673e-01],
[-8.50136936e-01],
[ 4.67970103e-01],
[ 1.20765483e+00],
[ 1.48111999e+00],
[-8.41505378e-02],
[ 6.36683226e-01],
[ 1.45922542e-01],
[ 8.47810745e-01],
[-8.98610055e-02],
[ 2.85117179e-01],
[ 1.28646851e-01],
[-9.05312777e-01],
[ 1.30433738e+00],
[ 8.32333624e-01],
[ 9.10141945e-01],
[-1.95402175e-01],
[-1.06986034e+00],
[ 1.10821402e+00],
[-1.39241308e-01],
[-9.89909530e-01],
[ 4.89472039e-02],
[-8.52360368e-01],
[-1.43903768e+00],
[ 5.79346418e-01],
[-1.01157117e+00],
[ 9.05100226e-01],
[-5.16619347e-02],
[ 1.06320441e+00],
[ 5.11557497e-02],
[-9.22539949e-01],
[-2.03765082e+00],
[ 8.44934464e-01],
[ 3.15759063e-01],
[-4.06293988e-01],
[-1.04356468e+00],
[-2.78512120e+00],
[-1.85211599e+00],
[ 4.51204628e-01],
[ 3.96401733e-01],
[ 1.11832297e+00],
[ 9.92170632e-01],
[-6.61871016e-01],
[ 1.10089862e+00],
[ 2.65899934e-02],
[ 6.96750939e-01],
[ 3.60055603e-02],
[ 4.42502826e-01],
[ 1.32942498e+00],
[ 7.70188391e-01],
[ 1.51264572e+00],
[-8.45213473e-01],
[ 2.84633011e-01],
[-8.56528878e-02],
[-1.43650496e+00],
[ 3.03723395e-01],
[ 5.22436142e-01],
[ 1.22615063e+00],
[ 9.09931004e-01],
[ 1.31563878e+00],
[-1.16991671e-03],
[-9.98629868e-01],
[ 1.26976180e+00],
[-1.44195616e+00],
[ 2.98613280e-01],
[-1.20759571e+00],
[ 9.75366056e-01],
[ 5.55955827e-01],
[-1.57154047e+00],
[ 9.10902202e-01],
[-1.36684406e+00],
[ 2.91386276e-01],
[-1.53479159e+00],
[-2.16175056e+00],
[ 1.36270612e-01],
[ 4.71654087e-01],
[ 7.37280965e-01],
[ 7.30745971e-01],
[ 1.11952341e+00],
[-1.84391057e+00],
[ 5.42985797e-01],
[-1.24474466e-01],
[-2.90602416e-01],
[ 1.72621608e-01],
[-1.36526795e-02],
[ 6.73085868e-01],
[-2.06324434e+00],
[ 4.71875191e-01],
[ 4.62054253e-01],
[ 1.08057201e+00],
[-1.40729034e+00],
[-1.09038782e+00],
[ 1.17768094e-01],
[-4.19126116e-02],
[ 1.09365022e+00],
[ 9.87251997e-02],
[-1.74598300e+00],
[-1.46835864e+00],
[ 5.68061650e-01],
[ 1.11516511e+00],
[-1.08592176e+00],
[-9.20174122e-01],
[-3.20152044e+00],
[-1.53171623e+00],
[-1.12078726e+00],
[-3.76025379e-01],
[-5.68312466e-01],
[-1.62122548e-01],
[-1.22766709e+00],
[-1.12846345e-01],
[ 6.23853028e-01],
[ 1.02843094e+00],
[ 1.07945263e-01],
[ 1.70268431e-01],
[ 1.25617772e-01],
[ 7.11729288e-01],
[-3.25015783e+00],
[ 7.38437474e-02],
[-2.45575339e-01],
[ 1.16022122e+00],
[-3.59005123e-01],
[ 1.04464805e+00],
[-5.55668592e-01],
[ 1.09465992e+00],
[-8.17058623e-01],
[ 1.39014915e-01],
[-8.93750608e-01],
[-6.13648593e-01],
[ 3.55499715e-01],
[-1.82588071e-01],
[ 3.20608586e-01],
[-1.14689708e+00],
[-6.56674266e-01],
[ 1.29371798e+00],
[ 7.81974614e-01],
[-4.89241362e-01],
[-1.23924315e+00],
[-3.83279711e-01],
[-6.76795781e-01],
[ 1.59546471e+00],
[-8.35786402e-01],
[-6.45230234e-01],
[-1.17509282e+00],
[ 7.80216396e-01],
[ 1.74909979e-01],
[ 6.88567400e-01],
[ 8.38780850e-02],
[ 1.59270063e-01],
[-6.10351324e-01],
[ 1.00286996e+00],
[ 1.17401338e+00],
[ 4.32071596e-01],
[-1.63599527e+00],
[ 8.12521040e-01],
[ 7.92818785e-01],
[ 6.93403184e-02],
[-1.08170652e+00],
[-5.03658831e-01],
[-4.73706909e-02],
[-4.23268646e-01],
[ 3.12597491e-02],
[ 7.27069438e-01],
[ 9.52139199e-02],
[ 2.29732782e-01],
[ 9.33728456e-01],
[ 1.21199763e+00],
[ 1.35553539e+00],
[ 4.55510110e-01],
[ 7.30035126e-01],
[ 5.90461791e-01],
[-5.84788978e-01],
[ 9.15414095e-02],
[ 1.13066244e+00],
[-1.45133764e-01],
[ 9.65184987e-01],
[ 8.58497202e-01],
[ 4.20144677e-01],
[ 3.72954994e-01],
[-9.77192700e-01],
[-8.36410582e-01],
[ 8.20704937e-01],
[-7.74810493e-01],
[ 9.25964117e-01],
[ 5.09230912e-01],
[ 1.49525791e-01],
[ 6.64378464e-01],
[ 1.55767095e+00],
[ 1.16064572e+00],
[ 3.18306834e-01],
[ 4.40745264e-01],
[ 1.40767109e+00],
[-6.13686979e-01],
[ 2.03147084e-01],
[ 3.54101777e-01],
[-1.50797379e+00],
[ 3.18131596e-01],
[-1.37317801e+00],
[-1.29619809e-02],
[ 5.98721921e-01],
[-5.76220751e-01],
[-1.16058624e+00],
[ 1.26369905e+00],
[ 5.70825338e-01],
[ 7.75644004e-01],
[ 7.99267709e-01],
[ 4.39230651e-01],
[ 3.67545694e-01],
[ 1.12801111e+00],
[ 8.11241329e-01],
[ 1.63147950e+00],
[-1.67399645e+00],
[ 4.60181266e-01],
[-2.49360538e+00],
[-3.21395956e-02],
[ 1.04973352e+00],
[-3.01008511e+00],
[-3.31538647e-01],
[ 9.03196037e-01],
[-1.40111303e+00],
[ 7.18792498e-01],
[-1.54014075e+00],
[-1.30988419e-01],
[ 2.09673703e-01],
[-8.79785240e-01],
[-3.52250785e-01],
[-1.62849927e+00],
[ 8.13203275e-01],
[ 4.73319203e-01],
[ 5.47543526e-01],
[ 1.43717563e+00],
[-6.57539010e-01],
[ 1.80131523e-03],
[-1.02453232e+00],
[-6.03744350e-02],
[-5.91808915e-01],
[ 6.97400570e-01],
[-6.51037455e-01],
[ 6.21403992e-01],
[ 6.16609871e-01],
[ 8.89318705e-01],
[ 4.62804645e-01],
[ 1.91137537e-01],
[-4.52241033e-01],
[-3.87682676e+00],
[-5.14699399e-01],
[ 4.73519824e-02],
[-4.67809401e-02],
[-9.39416349e-01],
[ 1.26614523e+00],
[ 5.33868432e-01],
[ 8.69098723e-01],
[ 5.28575540e-01],
[ 4.38616961e-01],
[ 7.70562053e-01],
[-1.46373487e+00],
[ 4.50699151e-01],
[ 1.96949631e-01],
[ 6.12041712e-01],
[ 7.64083028e-01],
[-3.52741361e+00],
[ 1.45277262e-01],
[ 1.65612787e-01],
[ 4.38533425e-01],
[-2.48667645e+00],
[ 1.92773968e-01],
[ 4.88228410e-01],
[ 9.40659583e-01],
[-1.02997154e-01],
[ 4.82853390e-02],
[-1.28369749e+00],
[ 4.70904440e-01],
[-9.93004322e-01],
[ 1.41880476e+00],
[-3.03581744e-01],
[ 6.51034653e-01],
[ 7.43983090e-01],
[ 1.19432032e+00],
[ 6.92491651e-01],
[ 8.07432175e-01],
[-9.82738376e-01],
[ 9.22874391e-01],
[ 6.67386532e-01],
[ 1.28300339e-01],
[ 1.52864009e-01],
[ 4.68493134e-01],
[-2.14909577e+00],
[-4.78457987e-01],
[ 3.02116156e-01],
[-8.83207917e-01],
[ 6.83480680e-01],
[ 2.82940604e-02],
[-2.89943755e-01],
[ 7.12881625e-01],
[ 1.29372728e+00],
[-1.45629495e-01],
[ 9.66929495e-02],
[-2.03770113e+00],
[ 4.70364362e-01],
[ 2.04002596e-02],
[ 1.28312588e+00],
[ 2.34469458e-01],
[-1.00727707e-01],
[ 3.97281684e-02],
[-1.65420103e+00],
[ 1.51657537e-01],
[ 1.04991764e-01],
[ 6.52542591e-01],
[-6.94869339e-01],
[ 7.68318832e-01],
[-2.69931525e-01],
[-2.84774117e-02],
[ 2.28126213e-01],
[ 9.32809949e-01],
[-2.49768585e-01],
[ 1.96041882e-01],
[ 6.80984080e-01],
[ 2.54315346e-01],
[-1.85379517e+00],
[-8.01015198e-01],
[ 4.35611635e-01],
[ 1.02972734e+00],
[-3.54356766e-01],
[-1.18666327e+00],
[ 4.95746523e-01],
[-9.84485805e-01],
[ 3.38030964e-01],
[-1.34528124e+00],
[ 6.79492831e-01],
[ 1.31230009e+00],
[ 2.68432260e-01],
[ 3.62544395e-02],
[ 1.18392631e-01],
[ 7.56380737e-01],
[ 8.19890374e-06],
[ 2.67845720e-01],
[-1.88982773e+00],
[-1.96749878e+00],
[ 3.13791670e-02],
[-6.87573671e-01],
[-1.71962702e+00],
[-3.93082786e+00],
[ 1.52391875e+00],
[-2.10408717e-01],
[ 5.26753783e-01],
[ 1.66754222e+00],
[-2.53525615e-01],
[-7.00067699e-01],
[ 7.59080768e-01],
[ 3.82731289e-01],
[ 6.41299069e-01],
[-2.95760989e+00],
[ 6.11043513e-01],
[ 6.03483140e-01],
[ 3.93115103e-01],
[ 1.07154775e+00],
[ 4.79877323e-01],
[ 1.27206719e+00],
[ 4.12568241e-01],
[ 5.83164155e-01],
[ 1.12685755e-01],
[-9.65158761e-01],
[-8.67547333e-01],
[ 3.75361890e-01],
[ 4.01545793e-01],
[ 5.47808230e-01],
[ 7.25134730e-01],
[-5.31004846e-01]], dtype=float32)
# Plot the scaled result
plt.plot(y_test, y_predict, "^", color = 'b')
plt.xlabel('Model Predictions')
plt.ylabel('True Values')
Text(0, 0.5, 'True Values')
y_predict_orig = scaler_y.inverse_transform(y_predict)
y_test_orig = scaler_y.inverse_transform(y_test)
# Plot the original values
plt.plot(y_test_orig, y_predict_orig, "^", color = 'b')
plt.xlabel('Model Predictions')
plt.ylabel('True Values')
Text(0, 0.5, 'True Values')
# Plot the KPIs
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from math import sqrt
k = X_test.shape[1]
n = len(X_test)
RMSE = float(format(np.sqrt(mean_squared_error(y_test_orig, y_predict_orig)),'.3f'))
MSE = mean_squared_error(y_test_orig, y_predict_orig)
MAE = mean_absolute_error(y_test_orig, y_predict_orig)
r2 = r2_score(y_test_orig, y_predict_orig)
adj_r2 = 1-(1-r2)*(n-1)/(n-k-1)
print('RMSE =',RMSE, '\nMSE =',MSE, '\nMAE =',MAE, '\nR2 =', r2, '\nAdjusted R2 =', adj_r2)
RMSE = 3.993 MSE = 15.946937 MAE = 2.9852057 R2 = 0.8340892506152433 Adjusted R2 = 0.8279335514331234
!jupyter nbconvert .ipynb --to html